/***************************************************************
                       sulston.c
serial and shared for calculating score
Equation 2 (Mott) is also in here
NULL parameters are used for serial. Non-NULL for shared
**************************************************************/
#include <stdio.h>
#include <malloc.h>
#include <float.h>
#include <math.h>
#include <assert.h>
#include <netinet/in.h>
#include "clam.h"

#define FAST_SUL_ITERATIONS 3

#define graphQuery messQuery

void CpMdisplay(), ZautoCpM();
int  ZscoreCpM();
BOOL ZboolCpM();

static CLONE my_clone;
extern void scanPzA(), scanPzB(), scanPz2(), EvalCtgDisplay(), CBCtgDisplay(), ClamCtgDisplay2();
extern void ZclearHigh(), ZhighCin();
extern int Zget_tol();
extern void ClamShare();
extern int NoPop;
extern int validDistribution;
extern float *distrib;
extern int get_precomp_value(int b1, int b2, int m, double* result);
extern int Zbcm(int b1, int diff);

BOOL ZboolCpM();
int Zcnt_shared_markers();

void Zgetmatch(struct tmpCz* clone1, struct tmpCz* clone2, struct tmpSz* result);
int Zcnt_shared_markers(struct tmpCz* clone1, struct tmpCz* clone2, struct tmpSz* result);
void Zmott(struct tmpCz* clone1, struct tmpCz* clone2, struct tmpSz* result);

/*************************************************************
                     CONTIGC
Finds the probability of coincidence
C(n, k) = (N over K) = bionomial coeffient = N!/K!(N-K)!

sum of N to nbandL C(nbandL, N) * (1 - p)^N * p^(nbandL-N)
where p = (1 - (2*tol/gel-len)) ^ nbandH

**************************************************************/
#define abs(x) ((x) >= 0 ? (x) : -(x))

/***********************************************************
                 DEF: Zmott
***********************************************************/
void Zmott(struct tmpCz* clone1, struct tmpCz* clone2, struct tmpSz* result)
{
   double bin, b, lp1, lp2, p1, p2, p, tol, tol1, pt;

    p1 = (double) clone1->nbands / (double) Proj.gel_len;
    p2 = (double) clone2->nbands / (double) Proj.gel_len;
    p = p1*p2;
    lp1 = log(1.0-p1);
    lp2 = log(1.0-p2);
    tol = (double) Pz.tol;
    tol1 = (double) Pz.tol+1;
    pt = -p + p1*(1.0 - exp(tol1*lp2))*(exp(tol*lp2)) +
              p2*(1.0 - exp(tol1*lp1))*(exp(tol*lp1));
    if (validDistribution)
    {
        b = (double) result->newMatch;
    }
    else
    {
         b = (double) result->match;
    }

    bin = (double) Proj.gel_len;

    result->prob = (double)
      exp(b*log(pt) + (bin-b)*log(1.0-pt) +
      lgamma(bin+1.0) -lgamma(b+1.0) - lgamma(bin-b+1.0));

}

/***********************************************************
                 DEF: fast_sulston_shared
*************************************************************/
static void fast_sulston (struct tmpCz* clone1, struct tmpCz* clone2, struct tmpSz* result)
{
   register int i, k, n, nbH, nbL;
   double a, c, pp, pa, pb;
   static int first_time=1;
   static double lim, Logfact[NBANDS+1], psmn=0.0;

    if (first_time)
    {
       Logfact[0] = 0.0;
       Logfact[1] = 0.0;
       for (i = 2; i <= NBANDS; ++i)
         Logfact[i] = Logfact[i - 1] + log((double) i);
       first_time=0;
       lim = log(DBL_MIN);
    }

    if (Pz.tol == 0)  psmn = 1 - (double) 1.0 / Proj.gel_len;
    else  psmn = 1 - (double) (Pz.tol << 1) / Proj.gel_len;

    nbL = MiN(clone1->nbands,clone2->nbands);
    nbH = MaX(clone1->nbands,clone2->nbands);

    result->prob = DBL_MIN;

    pp = (double) pow(psmn, nbH);
    pa = log(pp);
    pb = log(1.0 - pp);

    /* 18june00 starting at nbl 64, nhl 74, if the match is zero,
            this optimization causes the prob to be below cutoff */

    if (result->match==0) result->prob =1.0;
    else {
      for (k=0, n = result->match; n <= nbL && k < FAST_SUL_ITERATIONS; n++, k++)
      {
            c = Logfact[nbL] - Logfact[nbL - n] - Logfact[n];
            a = c + n * pb + (nbL - n) * pa;
            result->prob +=  (a >= lim) ? exp(a) : exp(lim);
       }
    }

}

/***********************************************************
                 DEF: pure_sulston
*************************************************************/
static void pure_sulston(struct tmpCz* clone1, struct tmpCz* clone2, struct tmpSz* result)
{
    register int i, n, nbH, nbL;
    double a, c, pp, pa, pb;
    static int first_time=1;
    static double lim,  Logfact[NBANDS+1], psmn=0.0;

    if (first_time) {
       Logfact[0] = 0.0;
       Logfact[1] = 0.0;
       for (i = 2; i <= NBANDS; ++i)
         Logfact[i] = Logfact[i - 1] + log((double) i);
       first_time=0;
       lim = log(DBL_MIN);
    }

    if (Pz.tol == 0)  psmn = 1 - (double) 1.0 / Proj.gel_len;
    else  psmn = 1 - (double) (Pz.tol << 1) / Proj.gel_len;

    nbL = MiN(clone1->nbands,clone2->nbands);
    nbH = MaX(clone1->nbands,clone2->nbands);

    result->prob = DBL_MIN;

    pp = (double) pow(psmn, nbH);
    pa = log(pp);
    pb = log(1.0 - pp);

    for (n = result->match; n <= nbL; n++)
    {
       c = Logfact[nbL] - Logfact[nbL - n] - Logfact[n];
       a = c + n * pb + (nbL - n) * pa;
       result->prob +=  (a >= lim) ? exp(a) : exp(lim);
    }
}

/***********************************************************
                 DEF: Zsulston
*************************************************************/
void Zsulston (struct tmpCz* clone1, struct tmpCz* clone2, struct tmpSz* result)

{
    Zgetmatch (clone1,clone2,result);

    if (0 == get_precomp_value(clone1->nbands, clone2->nbands, result->match, &result->prob))
    {
        if (Proj.eq2==1) Zmott(clone1,clone2,result);
        else if (Proj.eq2==2 || clone1->nbands >=60 || clone2->nbands >= 60)
             pure_sulston(clone1,clone2,result);
        else fast_sulston(clone1,clone2,result);
    }

}


void choose_sulston(struct tmpCz* clone1, struct tmpCz* clone2, struct tmpSz* result)
{
    if (Proj.eq2 == 1)
    {
       Zmott(clone1,clone2,result);
    }
    else if (Proj.eq2==2 || clone1->nbands >=60 || clone2->nbands >= 60)
    {
         pure_sulston(clone1,clone2,result);
    }
    else
    {
         fast_sulston(clone1,clone2,result);
    }
}

/*********************************************************************
                    DEF: Zgetmatch

This accepts two clones and computes the number of matching bands
which it stores in result->

*******************************************************************/
void Zgetmatch(struct tmpCz* clone1, struct tmpCz* clone2, struct tmpSz* result)
{
  int kbnd, k, l, idiff, lstart;

  result->total = result->match = result->newMatch = 0;
  lstart = 0;
  for (k = 0; k < clone1->nbands; ++k)
  {
      kbnd = clone1->coords[k];
      for (l = lstart; l < clone2->nbands; ++l)
      {
          idiff = kbnd - clone2->coords[l];
          if (Ztol(kbnd, idiff))
          {
             result->match++;
             if (validDistribution) {
                 result->newMatch += (distrib[kbnd] +
                                  distrib[kbnd-idiff])/2;
                  result->match = result->newMatch;
             }
             lstart = l + 1;
             break;
          }
          else if (idiff < 0)
          {
              lstart = l;
              break;
          }
       }
   }

  return;
}

/**************************************************************
            DE: ZscoreCpM
All clam routines call this for scoreing (Zsulston or Zmott must be called
before this).
**************************************************************/
int ZscoreCpM(struct tmpCz* clone1, struct tmpCz* clone2, struct tmpSz* result)
{
  int junk, exponent=0;
  char str[20];
  register int i;
  int retval = 0;

   if (result->prob < Pz.cutFlt)
   {
       CpMTbl[0].cnt++;
       sprintf(str,"%.0e", result->prob);

       sscanf(str,"%de-%d",&junk, &exponent);

       if (exponent==0) {
           printf("Clone1 %i\tScore%.0e\tScore%f\n",
                  clone1->nbands,result->prob,result->prob);
           printf("exp = 0\n");
       }

       retval = exponent;
   }
   else if (Cp.useFlag != 0 &&  result->prob < CpM[MAX_CpM-1].cut) {
        Zcnt_shared_markers(clone1,clone2,result);
        if (result->mark > 0)   {
           for (i=1; i<MAX_CpM; i++) {
               if (result->prob < CpM[i].cut && result->mark >= CpM[i].nm)
               {
                  CpMTbl[i].cnt++;
                  sprintf(str,"%.0e", result->prob);
                  sscanf(str,"%de-%d",&junk, &exponent);
                  if (exponent==0) printf("exp = 0\n");
                  retval = exponent;
                  break;
               }
           }
       }
   }

   return retval;
}

/******************************************************************
                 DEF: Zcnt_shared_markers()
*****************************************************************/
int Zcnt_shared_markers(struct tmpCz* clone1, struct tmpCz* clone2, struct tmpSz* result)
{
  struct marker *markerptr;
  struct markertop *mk1, *mk2;
  CLONE *c1, *c2;
  int score=0;

   c1 = arrp(acedata, clone1->cin, CLONE);
   c2 = arrp(acedata, clone2->cin, CLONE);
   for (mk1 = c1->marker; mk1 != NULL; mk1 = mk1->nextmarker)
   {
        markerptr = arrp(markerdata, mk1->markerindex, MARKER);
        if(!Cp.useYBP &&
           (markerptr->type == markYAC || markerptr->type == markBAC ||
                                markerptr->type == markPAC)) continue;
        if(!Cp.usePCR && markerptr->type == markPCR) continue;
        if(!Cp.useREP && markerptr->type == markREP) continue;

        for (mk2 = c2->marker; mk2 != NULL; mk2 = mk2->nextmarker)
            if (mk1->markerindex == mk2->markerindex) {
               score++;
               break;
            }
   }
   result->mark = score;

return score;
}

/*********************************************************8
                    DEF: loadCzfast
**********************************************************/

int loadCzfast(Cx, cin)
struct tmpCz *Cx;
int cin;
{
register int i, start, band, j, b;
CLONE *clp;

  Sz.match = Sz.olap = Sz.newMatch = 0;
  Sz.prob = 0.0;
  clp = arrp(acedata,cin,CLONE);
  if (clp->fp == NULL) return 0;
  if (clp->fp->b2 == 0) return 0;

  start = clp->fp->b1;
  band = clp->fp->b2;
  Cx->cin = cin;
  for (j=i=0; i < band; i++) {
     b = arr(bands, start + i - 1, int);
     if (b > Pz.minVal && b < Pz.maxVal)
        Cx->coords[j++] = b;
  }
  Cx->nbands = j;

return 1;
}

/*********************************************************8
                    DEF: loadCz
**********************************************************/

int loadCz(Cx, cin)
struct tmpCz *Cx;
int cin;
{
register int i, off, b, j;

  Sz.msg1[0] = Sz.msg2[0] = '\0';
  Sz.match = Sz.olap = 0;
  Sz.prob = 0.0;

  my_clone = arr(acedata,cin,CLONE);
  if (my_clone.fp == NULL) return 0;
  if (my_clone.fp->b2 == 0) return 0; // FIX 28may99

  if (my_clone.mattype==PARENT) strcpy(Cx->parent," canon");
  else if (my_clone.mattype==PSPARENT) strcpy(Cx->parent," canon+");
  else strcpy(Cx->parent, my_clone.match);

  if (my_clone.mattype==EXACT) strcpy(Cx->btype,"exact ");
  else if (my_clone.mattype==APPROX) strcpy(Cx->btype,"approx");
  else if (my_clone.mattype==PSEUDO) strcpy(Cx->btype,"pseudo");
  else strcpy(Cx->btype,"      ");

  strcpy(Cx->clone,my_clone.clone);
  strcpy(Cx->fpnum,my_clone.fp->fpchar);

  Cx->left = my_clone.x;
  Cx->right = my_clone.y;
  Cx->len = ( Cx->right - Cx->left);
  Cx->ctg = my_clone.ctg;
  Cx->cin = cin; // cari 2/4/4

  if (merging) {
     if (Cx->ctg == mergecontig2) {
         off = (contigs[mergecontig1].right -
                contigs[mergecontig2].left) + startpt;
         Cx->left += off;
         Cx->right += off;
     }
  }

  Cx->start = my_clone.fp->b1;

  if (my_clone.fp->b2 > NBANDS)
        printf("Bad number of bands %s %d\n",Cx->clone,
         my_clone.fp->b2);

   for (j=i=0; i < my_clone.fp->b2 && i < NBANDS; i++) {
      b = arr(bands, Cx->start + i - 1, int);
      if (b > Pz.minVal && b < Pz.maxVal)
         Cx->coords[j++] = b;
   }
   Cx->nbands = j;
return 1;
}
/*********************************************************8
                    DEF: loadSizeCz
**********************************************************/

int
loadSizeCz(struct tmpCz *Cx, int cin)
{
    CLONE *clone;
    int tmp[NBANDS];
    FILE *bandsFilePtr;
    char bandsFileName[1024];
    char name[CLONE_SZ], clonename[CLONE_SZ];
    char line[100];
    int  temp;
    int i,  nbands;

    if (Proj.variable)
        return 1; // already have size
    if (Pz.useSz <= 0)
        return 0;

    clone = arrp(acedata, cin, CLONE);

    /* CAS 3sept05 use Fp Name if exists */
    if (clone->fp != NULL && clone->fp->fpchar[0] != ' ')
        strcpy(clonename, clone->fp->fpchar);
    else
        strcpy(clonename, clone->clone);

    if (strlen(dirName) > 0)
        sprintf(bandsFileName, "%s/Sizes/%s.sizes",
                dirName, clone->fp->gelname);
    else
        sprintf(bandsFileName, "Sizes/%s.sizes",  clone->fp->gelname);

    if ((bandsFilePtr = fopen(bandsFileName, "r")) == NULL)  {
        printf("Warning: could not find %s\n", bandsFileName);
        return 0;
    }
    nbands = 0;
    while (1) {
        if (!fgets(line, 80, bandsFilePtr)) {
            printf("Warning: could not find %s (%s) in %s\n",
                   clone->clone, clonename, bandsFileName);
            fclose(bandsFilePtr);
            return 0;
        }
        sscanf(line, "%s", name);
        if (strcmp(name,clonename) != 0)
            continue;
        fscanf(bandsFilePtr, "%i", &temp);
        while (temp != -1) {
            tmp[nbands++] = temp;
            fscanf(bandsFilePtr, "%i", &temp);
        }
        tmp[nbands] = -1;
        break;
    }
    for (i = 0; i < nbands; i++)
        Cx->coords[i] = tmp[i];
    Cx->nbands = nbands;
    fclose(bandsFilePtr);
    return 1;
}

/********************************************************************
                   outputting stats
********************************************************************/
/*********************************************
         DEF: print_CpMcnts()
Called after incremental build
*********************************************/
void print_CpMcnts()
{
  if (Cp.useFlag==0) { ZBuf[0] = '\0';
  }
  else {
    sprintf(ZBuf,
      "CpM counts: %d (%d %1.0e) %d (%d %1.0e) %d (%d %1.0e)\n",
        CpMTbl[1].cnt, CpM[1].nm, CpM[1].cut,
        CpMTbl[2].cnt, CpM[2].nm, CpM[2].cut,
        CpMTbl[3].cnt,CpM[3].nm, CpM[3].cut);
  }
}
